setwd('/Users/emmatimminsschiffman/Documents/Dissertation/proteomics/DB post-genome/combined OT and QE') spec.dat<-read.csv('spec counts OT and QE.csv', header=T, row.names=1) #2504 proteins total treatment<-read.csv('treatment groupings.csv',header=T, row.names=1) #remove proteins that are expressed in fewer than half the oysters spec.dat1<-drop.var(spec.dat, min.fo=8) str(spec.dat1) #2164 proteins remaining #remove proteins that are super abundant spec.dat2<-drop.var(spec.dat, max.po=0.95) str(spec.dat2) #no proteins remaining #drop proteins with little variation spec.dat3<-drop.var(spec.dat, min.cv=5) str(spec.dat3) #all proteins remaining #data transformation log(x+1) specdat.tra<-(spec.dat+1) specdat.tra<-data.trans(specdat.tra, method='log', plot=F) specdat.tra1<-(spec.dat1+1) specdat.tra1<-data.trans(specdat.tra1, method='log', plot=F) #NMDS specdat.nmds<-metaMDS(specdat.tra, distance='bray', k=2, trymax=100, autotransform=F) vec.specdat<-envfit(specdat.nmds$points, specdat.tra, perm=100) ordiplot(specdat.nmds, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.specdat, p.max=0.01, col='blue', cex=0.5) ordihull(specdat.nmds, treatment[,3], label=T, col='magenta') specdat.nmds1<-metaMDS(specdat.tra1, distance='bray', k=2, trymax=100, autotransform=F) vec.specdat1<-envfit(specdat.nmds1$points, specdat.tra1, perm=100) ordiplot(specdat.nmds1, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.specdat1, p.max=0.01, col='blue', cex=0.5) ordihull(specdat.nmds1, treatment[,3], label=T, col='magenta') #looks really similar to NMDS with all proteins #ANOSIM spec.row<-data.stand(spec.dat, method='total', margin='row', plot=F) spec.d<-vegdist(spec.row, 'bray') #anosim for pCO2 only spec.anosim1<-anosim(spec.d, grouping=treatment[,1]) summary(spec.anosim1) #R=-0.01786, p=0.571 #MS only spec.anosim2<-anosim(spec.d, grouping=treatment[,2]) summary(spec.anosim2) #R=-0.005022, p=0.478 #all treatments spec.anosim3<-anosim(spec.d, grouping=treatment[,3]) summary(spec.anosim3) #R=-0.002604, p=0.488 #GO Terms go.dat<-read.csv('spec counts GO OT and QE.csv', header=T, row.names=1) godat.tra<-(go.dat+1) godat.tra<-data.trans(godat.tra,method='log', plot=F) godat.nmds<-metaMDS(godat.tra, distance='bray', k=2, trymax=100, autotransform=F) vec.godat<-envfit(godat.nmds$points, godat.tra, perm=100) ordiplot(godat.nmds, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.godat, p.max=0.01, col='blue', cex=0.5) ordihull(godat.nmds, treatment[,3], label=T, col='magenta') go.row<-data.stand(go.dat, method='total', margin='row', plot=F) go.d<-vegdist(go.row, 'bray') #pco2 only go.anosim1<-anosim(go.d, grouping=treatment[,1]) summary(go.anosim1) #R=0.1094, p=0.115 #MS only go.anosim2<-anosim(go.d, grouping=treatment[,2]) summary(go.anosim2) #R=0.02734, p=0.348 #all treatments go.anosim3<-anosim(go.d, grouping=treatment[,3]) summary(go.anosim3) #R=0.1623, p=0.067 #Histograms of number of peptides sequenced per protein OT.spec<-read.csv('total OT spec counts.csv', header=T, row.names=1) hist(OT.spec$Total, breaks=100, xlab='Peptides Sequenced per Protein', ylab='Number of Proteins', main='Orbitrap', ylim=c(0,500)) QE.spec<-read.csv('total QE spec counts.csv', header=T, row.names=1) hist(QE.spec$Total, breaks=200, xlab='Peptides Sequenced per Protein', ylab='Number of Proteins', main='Q Exactive', ylim=c(0,3000)) #what does it look like when we get rid of all proteins that only have 5 or fewer peptide hits? QE.spec2<-read.csv('total QE spec counts abridged.csv', header=T, row.names=1) hist(QE.spec2$Total, breaks=100, xlab='Peptides Sequenced per Protein', ylab='Number of Proteins', main='Q Exactive')